The following code digs into the chronological clustering of both the community size spectrum slopes, and the mean sizes across all years of the NMFS groundfish survey.
Data is prepared and updated using {targets} to ensure a consistent data state and a reproducible workflow.
Target Data
Data for the report comes directly from the {targets} workflow found in _targets.R.
#### Data ####
#### Size Spectrum Targets
# SS and manual bins together
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(size_spectrum_indices))
#### OISST Data
# OISST for all the regions
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(regional_oisst))
#### Size Data Targets
# 1. Biological data used as input
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(nefsc_1g))
# 2. Mean sizes grouped on - yr, season, survey area, taxon group, fishery status
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(mean_individual_sizes))
# 3. Mean sizes using same groupings as the size spectrum indices
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(mean_sizes_ss_groups))
# Cut up discrete length and weight bins
nefsc_size_bins <- nefsc_1g %>%
mutate(
length_bin = case_when(
LngtClass <= 5 ~ "0 - 5cm",
LngtClass <= 10 ~ "5 - 10cm",
LngtClass <= 25 ~ "10 - 25cm",
LngtClass <= 50 ~ "25 - 50cm",
LngtClass <= 100 ~ "50 - 100cm",
LngtClass >= 100 ~ ">= 100cm"),
length_bin = factor(length_bin, levels = c(
"0 - 5cm", "5 - 10cm", "10 - 25cm",
"25 - 50cm", "50 - 100cm", ">= 100cm")))
# Weight bins
nefsc_size_bins <- nefsc_size_bins %>%
mutate(
weight_bin = case_when(
ind_weight_kg <= 0.001 ~ "0 - 1g",
ind_weight_kg <= 0.005 ~ "1 - 5g",
ind_weight_kg <= 0.010 ~ "5 - 10g",
ind_weight_kg <= 0.050 ~ "10 - 50g",
ind_weight_kg <= 0.100 ~ "50 - 100g",
ind_weight_kg <= 0.500 ~ "100 - 500g",
ind_weight_kg <= 1.000 ~ ".5 - 1kg",
ind_weight_kg <= 5.000 ~ "1 - 5kg",
ind_weight_kg <= 10.00 ~ "5 - 10kg",
ind_weight_kg >= 10.00 ~ ">= 10kg" ),
weight_bin = factor(weight_bin, levels = c(
"0 - 1g", "1 - 5g", "5 - 10g", "10 - 50g",
"50 - 100g", "100 - 500g", ".5 - 1kg",
"1 - 5kg", "5 - 10kg", ">= 10kg")))
# Rename the functional groups
nefsc_size_bins <- nefsc_size_bins %>%
mutate(
spec_class = case_when(
spec_class == "gf" ~ "Groundfish",
spec_class == "pel" ~ "Pelagic",
spec_class == "dem" ~ "Demersal",
spec_class == "el" ~ "Elasmobranch",
spec_class == "<NA>" ~ "NA"
))
# Make regions go N->S
nefsc_size_bins <- nefsc_size_bins %>%
mutate(
survey_area = factor(survey_area, levels = c("GoM", "GB", "SNE", "MAB")
))
# Do some grouping
regional_totals <- nefsc_size_bins %>%
group_by(Year, survey_area) %>%
summarise(total_survey_catch = sum(Number),
total_lw_bio = sum(sum_weight_kg),
total_strat_abund = sum(expanded_abund_s),
total_strat_lw_bio = sum(sum_weight_kg)) %>%
ungroup()
# Get fraction for different length/weight classes
# length bins
regional_lengths <- nefsc_size_bins %>%
group_by(Year, survey_area, length_bin) %>%
summarise(lenbin_survey_catch = sum(Number),
lenbin_lw_bio = sum(sum_weight_kg),
lenbin_strat_abund = sum(expanded_abund_s),
lenbin_strat_lw_bio = sum(stratified_sum_bodymass)) %>%
left_join(regional_totals) %>%
mutate(
perc_total_catch = (lenbin_survey_catch - total_survey_catch) * 100,
perc_lw_bio = (lenbin_lw_bio - total_lw_bio) * 100,
perc_strat_abund = (lenbin_strat_abund - total_strat_abund) * 100,
perc_strat_lw_bio = (lenbin_strat_lw_bio - total_strat_lw_bio) * 100)
# # Biomass from Survey using L-W
# ggplot(regional_lengths, aes(Year, lenbin_survey_catch, fill = length_bin)) +
# geom_col(position = "stack") +
# facet_wrap(~survey_area) +
# scale_fill_gmri() +
# scale_y_continuous(labels = comma_format()) +
# labs(y = "Survey Catch",
# subtitle = "Total Survey Abundance",
# x = "",
# fill = "Length Bin")
# Expected Biomass for Entire Survey Area
ggplot(regional_lengths,
aes(Year, lenbin_strat_abund, fill = length_bin)) +
geom_col(position = "stack") +
facet_wrap(~survey_area) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Predicted Abundance - Full Survey Area",
title = "Abundance",
subtitle = "Expected Area-Stratified Abundance",
x = "",
fill = "Length Bin")
# # Biomass from Survey using L-W
# ggplot(regional_lengths, aes(Year, lenbin_lw_bio, fill = length_bin)) +
# geom_col(position = "stack") +
# facet_wrap(~survey_area) +
# scale_fill_gmri() +
# scale_y_continuous(labels = comma_format()) +
# labs(y = "Survey Biomass (kg)",
# subtitle = "Survey Biomass",
# x = "",
# fill = "Length Bin")
# Expected Biomass for Entire Survey Area
ggplot(regional_lengths,
aes(Year, lenbin_strat_lw_bio, fill = length_bin)) +
geom_col(position = "stack") +
facet_wrap(~survey_area) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Length Bin")+ theme(legend.position = "none")
# weight bins
regional_weights <- nefsc_size_bins %>%
group_by(Year, survey_area, weight_bin) %>%
summarise(wtbin_survey_catch = sum(Number),
wtbin_lw_bio = sum(sum_weight_kg),
wtbin_strat_abund = sum(expanded_abund_s),
wtbin_strat_lw_bio = sum(sum_weight_kg)) %>%
left_join(regional_totals) %>%
mutate(
perc_total_catch = (wtbin_survey_catch / total_survey_catch) * 100,
perc_lw_bio = (wtbin_lw_bio / total_lw_bio) * 100,
perc_strat_abund = (wtbin_strat_abund / total_strat_abund) * 100,
perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)
# # Biomass from Survey using L-W
# ggplot(regional_weights, aes(Year, wtbin_survey_catch, fill = weight_bin)) +
# geom_col(position = "stack") +
# facet_wrap(~survey_area) +
# scale_fill_gmri() +
# scale_y_continuous(labels = comma_format()) +
# labs(y = "Survey Catch",
# subtitle = "Total Survey Abundance",
# x = "",
# fill = "Length Bin")
# Expected Biomass for Entire Survey Area
ggplot(regional_weights,
aes(Year, wtbin_strat_abund, fill = weight_bin)) +
geom_col(position = "stack") +
facet_wrap(~survey_area) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Predicted Abundance - Full Survey Area",
title = "Abundance",
subtitle = "Expected Area-Stratified Abundance" ,
x = "",
fill = "Weight Bin")
# # Biomass from Survey using L-W
# ggplot(regional_weights,
# aes(Year, wtbin_lw_bio, fill = weight_bin)) +
# geom_col(position = "stack") +
# facet_wrap(~survey_area) +
# scale_fill_gmri() +
# scale_y_continuous(labels = comma_format()) +
# labs(y = "Survey Biomass (kg)", subtitle = "Survey Biomass",
# x = "",
# fill = "Length Bin")
# Expected Biomass for Entire Survey Area
ggplot(regional_weights,
aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
geom_col(position = "stack") +
facet_wrap(~survey_area) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Weight Bin") + theme(legend.position = "none")
# Do some grouping
seasonal_totals <- nefsc_size_bins %>%
group_by(Year, season) %>%
summarise(total_survey_catch = sum(Number),
total_lw_bio = sum(sum_weight_kg),
total_strat_abund = sum(expanded_abund_s),
total_strat_lw_bio = sum(sum_weight_kg)) %>%
ungroup()
# Get fraction for different length/weight classes
# length bins
seasonal_lengths <- nefsc_size_bins %>%
group_by(Year, season, length_bin) %>%
summarise(lenbin_survey_catch = sum(Number),
lenbin_lw_bio = sum(sum_weight_kg),
lenbin_strat_abund = sum(expanded_abund_s),
lenbin_strat_lw_bio = sum(sum_weight_kg)) %>%
left_join(seasonal_totals) %>%
mutate(
perc_total_catch = (lenbin_survey_catch - total_survey_catch) * 100,
perc_lw_bio = (lenbin_lw_bio - total_lw_bio) * 100,
perc_strat_abund = (lenbin_strat_abund - total_strat_abund) * 100,
perc_strat_lw_bio = (lenbin_strat_lw_bio - total_strat_lw_bio) * 100)
# # Biomass from Survey using L-W
# ggplot(seasonal_lengths, aes(Year, lenbin_survey_catch, fill = length_bin)) +
# geom_col(position = "stack") +
# facet_wrap(~season) +
# scale_fill_gmri() +
# scale_y_continuous(labels = comma_format()) +
# labs(y = "Survey Catch",
# subtitle = "Total Survey Abundance",
# x = "",
# fill = "Length Bin")
# Expected Biomass for Entire Survey Area
ggplot(seasonal_lengths,
aes(Year, lenbin_strat_abund, fill = length_bin)) +
geom_col(position = "stack") +
facet_wrap(~season) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Predicted Abundance - Full Survey Area",
title = "Abundance",
subtitle = "Expected Area-Stratified Abundance" ,
x = "",
fill = "Length Bin")
# # Biomass from Survey using L-W
# ggplot(seasonal_lengths,
# aes(Year, lenbin_lw_bio, fill = length_bin)) +
# geom_col(position = "stack") +
# facet_wrap(~season) +
# scale_fill_gmri() +
# scale_y_continuous(labels = comma_format()) +
# labs(y = "Survey Biomass (kg)", subtitle = "Survey Biomass",
# x = "",
# fill = "Length Bin")
# Expected Biomass for Entire Survey Area
ggplot(seasonal_lengths,
aes(Year, lenbin_strat_lw_bio, fill = length_bin)) +
geom_col(position = "stack") +
facet_wrap(~season) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Length Bin") + theme(legend.position = "none")
# weight bins
seasonal_weights <- nefsc_size_bins %>%
group_by(Year, season, weight_bin) %>%
summarise(wtbin_survey_catch = sum(Number),
wtbin_lw_bio = sum(sum_weight_kg),
wtbin_strat_abund = sum(expanded_abund_s),
wtbin_strat_lw_bio = sum(sum_weight_kg)) %>%
left_join(seasonal_totals) %>%
mutate(
perc_total_catch = (wtbin_survey_catch / total_survey_catch) * 100,
perc_lw_bio = (wtbin_lw_bio / total_lw_bio) * 100,
perc_strat_abund = (wtbin_strat_abund / total_strat_abund) * 100,
perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)
# # Biomass from Survey using L-W
# ggplot(seasonal_weights, aes(Year, wtbin_survey_catch, fill = weight_bin)) +
# geom_col(position = "stack") +
# facet_wrap(~season) +
# scale_fill_gmri() +
# scale_y_continuous(labels = comma_format()) +
# labs(y = "Survey Catch",
# subtitle = "Total Survey Abundance",
# x = "",
# fill = "Length Bin")
# Expected Biomass for Entire Survey Area
ggplot(seasonal_weights,
aes(Year, wtbin_strat_abund, fill = weight_bin)) +
geom_col(position = "stack") +
facet_wrap(~season) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Predicted Abundance - Full Survey Area",
title = "Abundance",
subtitle = "Expected Area-Stratified Abundance" ,
x = "",
fill = "Weight Bin")
# # Biomass from Survey using L-W
# ggplot(seasonal_weights,
# aes(Year, wtbin_lw_bio, fill = weight_bin)) +
# geom_col(position = "stack") +
# facet_wrap(~season) +
# scale_fill_gmri() +
# scale_y_continuous(labels = comma_format()) +
# labs(y = "Survey Biomass (kg)", subtitle = "Survey Biomass",
# x = "",
# fill = "Length Bin")
# Expected Biomass for Entire Survey Area
ggplot(seasonal_weights,
aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
geom_col(position = "stack") +
facet_wrap(~season) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Weight Bin") + theme(legend.position = "none")
# Do some grouping
fgroup_totals <- nefsc_size_bins %>%
group_by(Year, spec_class) %>%
summarise(total_survey_catch = sum(Number),
total_lw_bio = sum(sum_weight_kg),
total_strat_abund = sum(expanded_abund_s),
total_strat_lw_bio = sum(sum_weight_kg)) %>%
ungroup()
# Display table of whichever specis fall into the categories
nefsc_size_bins %>%
distinct(SpecCode, scientific_name, spec_class) %>%
rename(`Common Name` = SpecCode,
`Scientific Name` = scientific_name,
`Functional Group` = spec_class) %>%
arrange(`Functional Group`, `Common Name`) %>%
DT::datatable(filter = "top", rownames = FALSE)
# Get fraction for different length/weight classes
# length bins
fgroup_lengths <- nefsc_size_bins %>%
group_by(Year, spec_class, length_bin) %>%
summarise(lenbin_survey_catch = sum(Number),
lenbin_lw_bio = sum(sum_weight_kg),
lenbin_strat_abund = sum(expanded_abund_s),
lenbin_strat_lw_bio = sum(sum_weight_kg)) %>%
left_join(fgroup_totals) %>%
mutate(
perc_total_catch = (lenbin_survey_catch - total_survey_catch) * 100,
perc_lw_bio = (lenbin_lw_bio - total_lw_bio) * 100,
perc_strat_abund = (lenbin_strat_abund - total_strat_abund) * 100,
perc_strat_lw_bio = (lenbin_strat_lw_bio - total_strat_lw_bio) * 100)
# # Biomass from Survey using L-W
# ggplot(fgroup_lengths, aes(Year, lenbin_survey_catch, fill = length_bin)) +
# geom_col(position = "stack") +
# facet_wrap(~spec_class) +
# scale_fill_gmri() +
# scale_y_continuous(labels = comma_format()) +
# labs(y = "Survey Catch",
# subtitle = "Total Survey Abundance",
# x = "",
# fill = "Length Bin")
# Expected Biomass for Entire Survey Area
ggplot(fgroup_lengths,
aes(Year, lenbin_strat_abund, fill = length_bin)) +
geom_col(position = "stack") +
facet_wrap(~spec_class) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Predicted Abundance - Full Survey Area",
title = "Abundance",
subtitle = "Expected Area-Stratified Abundance" ,
x = "",
fill = "Length Bin")
# # Biomass from Survey using L-W
# ggplot(fgroup_lengths,
# aes(Year, lenbin_lw_bio, fill = length_bin)) +
# geom_col(position = "stack") +
# facet_wrap(~spec_class) +
# scale_fill_gmri() +
# scale_y_continuous(labels = comma_format()) +
# labs(y = "Survey Biomass (kg)", subtitle = "Survey Biomass",
# x = "",
# fill = "Length Bin")
# Expected Biomass for Entire Survey Area
ggplot(fgroup_lengths,
aes(Year, lenbin_strat_lw_bio, fill = length_bin)) +
geom_col(position = "stack") +
facet_wrap(~spec_class) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Length Bin")
# weight bins
fgroup_weigths <- nefsc_size_bins %>%
group_by(Year, spec_class, weight_bin) %>%
summarise(wtbin_survey_catch = sum(Number),
wtbin_lw_bio = sum(sum_weight_kg),
wtbin_strat_abund = sum(expanded_abund_s),
wtbin_strat_lw_bio = sum(sum_weight_kg)) %>%
left_join(fgroup_totals) %>%
mutate(
perc_total_catch = (wtbin_survey_catch / total_survey_catch) * 100,
perc_lw_bio = (wtbin_lw_bio / total_lw_bio) * 100,
perc_strat_abund = (wtbin_strat_abund / total_strat_abund) * 100,
perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)
# # Biomass from Survey using L-W
# ggplot(fgroup_weigths, aes(Year, wtbin_survey_catch, fill = weight_bin)) +
# geom_col(position = "stack") +
# facet_wrap(~spec_class) +
# scale_fill_gmri() +
# scale_y_continuous(labels = comma_format()) +
# labs(y = "Survey Catch",
# subtitle = "Total Survey Abundance",
# x = "",
# fill = "Weight Bin")
# Expected Biomass for Entire Survey Area
ggplot(fgroup_weigths,
aes(Year, wtbin_strat_abund, fill = weight_bin)) +
geom_col(position = "stack") +
facet_wrap(~spec_class) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Predicted Abundance - Full Survey Area",
title = "Abundance",
subtitle = "Expected Area-Stratified Abundance",
x = "",
fill = "Weight Bin")
# # Biomass from Survey using L-W
# ggplot(fgroup_weigths,
# aes(Year, wtbin_lw_bio, fill = weight_bin)) +
# geom_col(position = "stack") +
# facet_wrap(~spec_class) +
# scale_fill_gmri() +
# scale_y_continuous(labels = comma_format()) +
# labs(y = "Survey Biomass (kg)",
# subtitle = "Survey Biomass",
# x = "",
# fill = "Weight Bin")
# Expected Biomass for Entire Survey Area
ggplot(fgroup_weigths,
aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
geom_col(position = "stack") +
facet_wrap(~spec_class) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Weight Bin")
Community size spectrum slopes were estimated using 2 methods for comparison, using a minimum individual biomass of 1 gram, and using area stratified abundances.
The first was using the methods of the {sizeSpectra} package which were shown to be the most accurate when using simulated data.
The second method uses binned abundances, with bins of width 0.5 on a log10 scale, so 10^0 - 10^0.5 etc. These bins were then normalized by dividing the abundances by the bin width to account for the increasing bin size.
Results from Both Methods
# Pull the group ID for the slopes grouped on year, season, and region
warmem_group_slopes <- size_spectrum_indices %>%
filter(`group ID`== "single years * season * region") %>%
mutate(season = fct_rev(season),
survey_area = factor(survey_area,
levels = c("GoM", "GB", "SNE", "MAB")),
yr = as.numeric(as.character(Year)))
The Bodymass Data to Match
# Do some grouping
warmem_totals <- nefsc_size_bins %>%
group_by(Year, survey_area, season) %>%
summarise(total_survey_catch = sum(Number),
total_lw_bio = sum(sum_weight_kg),
total_strat_abund = sum(expanded_abund_s),
total_strat_lw_bio = sum(sum_weight_kg)) %>%
ungroup()
# weight bins
warmem_weights <- nefsc_size_bins %>%
group_by(Year, survey_area, season, weight_bin) %>%
summarise(wtbin_survey_catch = sum(Number),
wtbin_lw_bio = sum(sum_weight_kg),
wtbin_strat_abund = sum(expanded_abund_s),
wtbin_strat_lw_bio = sum(sum_weight_kg)) %>%
left_join(warmem_totals) %>%
mutate(
perc_total_catch = (wtbin_survey_catch / total_survey_catch) * 100,
perc_lw_bio = (wtbin_lw_bio / total_lw_bio) * 100,
perc_strat_abund = (wtbin_strat_abund / total_strat_abund) * 100,
perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)
# # Expected Abundance for Entire Survey Area
# ggplot(warmem_weights,
# aes(Year, wtbin_strat_abund, fill = weight_bin)) +
# geom_col(position = "stack") +
# facet_grid(season~survey_area) +
# scale_fill_gmri() +
# scale_y_continuous(labels = comma_format()) +
# labs(y = "Total Predicted Abundance - Full Survey Area",
# subtitle = "Expected Area-Stratified Abundance" ,
# x = "",
# fill = "Length Bin")
# Expected Biomass for Entire Survey Area
ggplot(warmem_weights,
aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
geom_col(position = "stack") +
facet_grid(season~survey_area) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Length Bin")
The Edwards methodology differs from the other methods for estimating size spectra by avoiding the subjective decisions around how to bin data prior to fitting the log-linear regression.
Instead, Edwards uses the individual size distributions (how many individuals of any given length/weight). Abundances are totaled into discrete size bins based on expected biomass at length and length + 1, and individuals that fall within those bins are totaled to get abundances across a continuous distribution of individual bodymass.
The next difference is that the individual body size data is presumed to follow a bounded power-law distribution, with a minimum and maximum body size. Using the individual size data, maximum likelihood estimation is used to solve for the parameter (b) that minimizes the negative log-likelihood.
# Plot the sizeSpectra slopes
(ss_patterns <- warmem_group_slopes %>%
ggplot(aes(yr, b, color = survey_area)) +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(season~survey_area) +
scale_color_gmri() +
labs(x = "",
y = "Size Spectrum Slope (b)",
color = "",
title = "Results from Area-Stratified Abundance - Edwards Method")
)
Prep and Clustering Code
# Reshaping
# Goal: Row for Years, columns for each different group
cluster_dat <- warmem_group_slopes %>%
select(Year, season, survey_area, b)
# Pivot wider
ss_dat <- cluster_dat %>%
pivot_wider(names_from = c(season, survey_area),
values_from = b,
names_sep = "_") %>%
column_to_rownames(var = "Year")
# Function to run chronological clustering
run_chron_clust <- function(in_dat){
# Get Euclidean Distances
eucdist <- vegdist(in_dat,
method = "euclidean",
binary = FALSE,
diag = FALSE,
upper = FALSE,
na.rm = TRUE)
# Perform Chronological Clustering on distances
cl <- chclust(eucdist, method = "coniss")
# Return list
return(list(eucdist = eucdist, cl = cl))
}
# Run for sizeSpectra Results
ss_chron <- run_chron_clust(in_dat = ss_dat)
Broomstick Plot
# broken stick model
vegan::bstick(ss_chron$cl, plot=T)
Dendrogram
plot(ss_chron$cl,
hang = -0.1,
axes = FALSE,
cex = 0.8,
horiz = F)
axis(side = 2, cex.axis = 1)
title("sizeSpectra Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)
ss_patterns +
geom_vline(aes(xintercept = 1987.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2008.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2004.5), color = "gray40", linetype = 2, size = 0.7) +
# geom_vline(aes(xintercept = 2001.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 1980.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 1979.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 1973.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 2007.5), color = "gray50", linetype = 3, size = 0.7) +
labs(subtitle = "Chronological Clustering Breakpoints - 3 or 8")
# plot trends of log10 slopes
(l10_patterns <- warmem_group_slopes %>%
ggplot(aes(yr, l10_slope_strat, color = survey_area)) +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(season~survey_area) +
scale_color_gmri() +
labs(x = "",
y = "Size Spectrum Slope (b)",
color = "",
title = "Results from Area-Stratified Abundance - log10 bins")
)
Prep and Clustering Code
# Reshaping
# Goal: Row for Years, columns for each different group
l10_cluster_dat <- warmem_group_slopes %>%
select(Year, season, survey_area, l10_slope_strat)
# Pivot wider
l10_dat <- l10_cluster_dat %>%
pivot_wider(names_from = c(season, survey_area),
values_from = l10_slope_strat,
names_sep = "_") %>%
column_to_rownames(var = "Year")
# Run clustering
l10_clust <- run_chron_clust(in_dat = l10_dat)
Broomstick Plot
# broken stick model
vegan::bstick(l10_clust$cl, plot=T)
Dendrogram
plot(l10_clust$cl,
hang = -0.1,
axes = FALSE,
cex = 0.8,
horiz = F)
axis(side = 2, cex.axis = 1.3)
title("Log10 Size Spectrum Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)
l10_patterns +
geom_vline(aes(xintercept = 1987.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1996.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1995.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2001.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2002.5), color = "gray40", linetype = 2, size = 0.7) +
# geom_vline(aes(xintercept = 1997.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 1975.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 1973.5), color = "gray50", linetype = 3, size = 0.7) +
labs(subtitle = "Chronological Clustering Breakpoints - 5 or 8 breaks")
# plot trends of log10 slopes
(int_patterns <- warmem_group_slopes %>%
ggplot(aes(yr, l10_int_strat, color = survey_area)) +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(season~survey_area) +
scale_color_gmri() +
labs(x = "",
y = "Size Spectrum Intercept",
color = "",
title = "Results from Area-Stratified Abundance - log10 bins")
)
Prep and Clustering Code
# Reshaping
# Goal: Row for Years, columns for each different group
l10_intercept_dat <- warmem_group_slopes %>%
select(Year, season, survey_area, l10_int_strat)
# Pivot wider
intercept_dat <- l10_intercept_dat %>%
pivot_wider(names_from = c(season, survey_area),
values_from = l10_int_strat,
names_sep = "_") %>%
column_to_rownames(var = "Year")
# Run Clustering
l10_ints <- run_chron_clust(in_dat = intercept_dat)
Broomstick Plot
# broken stick model
vegan::bstick(l10_ints$cl, plot=T)
Dendrogram
plot(l10_ints$cl,
hang = -0.1,
axes = FALSE,
cex = 0.8,
horiz = F)
axis(side = 2, cex.axis = 1.3)
title("Log10 Bins Intercept - Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)
# Re-plot patterns with breaks
int_patterns +
geom_vline(aes(xintercept = 1987.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2011.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1996.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1995.5), color = "gray50", linetype = 2, size = 0.7) +
labs(subtitle = "Chronological Clustering Breakpoints")
For each of the slope/intercepts derived using a linear model and binned data I also pulled the adjusted r-square to get a sense of whether or not certain groups had poor fits that should be investigated.
# Reshaping
# Goal: Row for Years, columns for each different group
l10_fit_dat <- warmem_group_slopes %>%
select(Year, season, survey_area, l10_slope_strat, l10_int_strat, l10_rsqr_strat) %>%
mutate(yr = as.numeric(Year))
l10_fit_dat %>%
ggplot(aes(yr, l10_rsqr_strat, color = survey_area)) +
geom_rect(xmin = -Inf, xmax = Inf, ymin = 0, ymax = 0.2,
fill = "gray80", color = "transparent") +
geom_hline(yintercept = 0.2, linetype = 2, size = 0.5, color = "gray50") +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(season~survey_area) +
scale_color_gmri() +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
labs(x = "",
y = "Linear Model R-Square",
color = "",
title = "Results from Area-Stratified Abundance - log10 bins")
# plot trends of log10 slopes
(sst_patterns <- regional_oisst %>%
ggplot(aes(yr, sst_anom, color = survey_area)) +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(~survey_area) +
scale_color_gmri() +
labs(x = "",
y = "Mean Temperature Anomaly",
color = "",
title = "")
)
Prep and Clustering Code
# Pivot OISST Wider
oisst_dat <- regional_oisst %>%
select(yr, survey_area, sst_anom) %>%
pivot_wider(names_from = survey_area, values_from = sst_anom) %>%
column_to_rownames(var = "yr")
# Run clustering
sst_clust <- run_chron_clust(oisst_dat)
Broomstick Plot
# broken stick model
vegan::bstick(sst_clust$cl, plot=T)
Dendrogram
plot(sst_clust$cl,
hang = -0.1,
axes = FALSE,
cex = 0.8,
horiz = F)
axis(side = 2, cex.axis = 1.3)
title("SST Anomalies - Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)
# Re-plot patterns with breaks
sst_patterns +
geom_vline(aes(xintercept = 2011.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1998.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2002.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2005.5), color = "gray50", linetype = 2, size = 0.7) +
labs(subtitle = "Chronological Clustering Breakpoints - 3 or 8")
A work by Adam A. Kemberling
Akemberling@gmri.org